
###############################################
# Figures 10, 11.Individal Level Correlations #
#     with IRT and Pan & Xu's categorization  #
###############################################


###
rm(list=ls(all=TRUE)) 
load("data/sample10K.RData")
load("output/result_irt.RData")

## ## income
d$inc<-d$income
d$inc[which(d$inc%in%c(1,2))]<-1.5 # 0-25K, 25-50K --> 0-50K
d$inc[which(d$inc%in%c(3,4,5))]<-3.5 # 50-75K, 75-100K, 100-150K --> 50-150K

## ## education
d[which(d$educ==1),]$educ <- 2

###latent score & se
library(hIRT)
lv1 <- latent_scores(zuobiao_m1)
colnames(lv1) <- c("lt1_est", "lt1_se")
lv1$lt1_est <- lv1$lt1_est * (-1) ##flip the sign

lv2 <- latent_scores(zuobiao_m2)
colnames(lv2) <- c("lt2_est", "lt2_se")
lv2$lt2_est <- lv2$lt2_est * (-1)

lv3 <- latent_scores(zuobiao_m3)
colnames(lv3) <- c("lt3_est", "lt3_se")
lv3$lt3_est <- lv3$lt3_est * (-1)

d_lt_irt <- cbind(d, lv1, lv2, lv3)

set.seed(02138)
ndim <- 3

mean_boot_irt <- function(X, nsims){
  ##unique values of covariates
  nX <- sort(unique(d_lt_irt[,X]))
  
  ##bootstrap
  output <- sapply(1:nsims, function(k){ 
  lt1_mean <- lt2_mean <- lt3_mean <- rep(NA, length(nX))
  for (i in 1:length(nX)){
  subset_d <- subset(d_lt_irt, d_lt_irt[,X] == nX[i])
  obs <- sample(1:nrow(subset_d), nrow(subset_d), replace = TRUE)
  lt1_mean[i] <- mean(sapply(obs, function(j){
              rnorm(1, mean = subset_d[j, "lt1_est"], sd = subset_d[j, "lt1_se"])
              }))
  lt2_mean[i] <- mean(sapply(obs, function(j){
    rnorm(1, mean = subset_d[j, "lt2_est"], sd = subset_d[j, "lt2_se"])
  }))
  lt3_mean[i] <- mean(sapply(obs, function(j){
    rnorm(1, mean = subset_d[j, "lt3_est"], sd = subset_d[j, "lt3_se"])
  }))
  }
  lt_mean <- c(lt1_mean, lt2_mean, lt3_mean)
  return(lt_mean)})
  
  ###summary###
  result <-array(NA,dim = c(length(nX), ndim, 4)) ## mean, se, CI_l, CI_u 
  point_est <- apply(output, MARGIN = 1, mean)
  sd <- apply(output, MARGIN = 1, sd)
  ci_lower <- apply(output, MARGIN = 1, quantile, c(0.025))
  ci_upper <- apply(output, MARGIN = 1, quantile, c(0.975))
  n <- length(nX)
  for (l in 1:length(nX)){
    result[l,,] <- rbind(c(point_est[l], sd[l], ci_lower[l], ci_upper[l]),
                         c(point_est[l+n], sd[l+n], ci_lower[l+n], ci_upper[l+n]),
                         c(point_est[l+2*n], sd[l+2*n], ci_lower[l+2*n], ci_upper[l+2*n]))
  }
  
  return(result)
}

##running the bootstrapping
nsims <- 200
ses.educ.irt <- mean_boot_irt(X = "educ", nsims = nsims)
ses.age.irt <- mean_boot_irt(X = "age", nsims = nsims)
ses.female.irt <- mean_boot_irt(X = "female", nsims = nsims)
ses.income.irt <- mean_boot_irt(X = "inc", nsims = nsims)
save(lv1, lv2, lv3, d_lt_irt, ses.female.irt, ses.educ.irt, ses.income.irt, ses.age.irt,
     file="output/result_irt_ind.RData")


####################################
## IRT versions of Figures 10, 11 ## 
## Individual-level Correlation   ##
####################################

rm(list=ls(all=TRUE)) 
load("output/result_irt_ind.RData")

mycol = c(1, "gray50", "gray80")
mypch = c(16, 17, 15)

## Education
pdf("graphs/ses_irt_educ.pdf")
output <- ses.educ.irt
offset <- seq(-0.15,0.15,length.out=3)
par(mar=c(7.5,4.5,2,2))
plot(1, type="n",ylab="", xlab="Education", ylim=c(-0.6,0.6), xlim=c(0.5,3.5),
     cex.lab=1.5, cex.axis=1.5, xaxt="n")
abline(h=0)
axis(1,at=1:3, labels=c("Below\nCollege","College\n","Above\nCollege"),
     cex.lab = 1.5, cex.axis=1.2, tick=FALSE,line=1)
axis(1,at=1:3, labels = NA, tick = TRUE)
for(i in seq(-2,2,by=0.05)) abline(h=i,col="#AAAAAA50",lwd=1)
for(i in seq(-0.5,3.5,by=0.25)) abline(v=i,col="#AAAAAA50",lwd=1)
n<-dim(output)[1]
for (k in 1:3) { # four latent variables
  for (i in 1:n) {lines(c(i,i) + offset[k],c(output[i,k,3],output[i,k,4]),lwd=4, col = mycol[k])}
  points(c(1:n) + offset[k],output[,k,1],pch=mypch[k],cex=2, col = mycol[k])
}
legend("topleft", legend = c("Political Liberalism",
                             "Pro-Market/Non-Traditional Values",
                             "Nationalism * (-1)"),
       col=mycol, pch=mypch, lty=1, bty = "n", lwd = 3, cex = 1.3)
title(main = "IRT: Original Model", line = -31.5)
graphics.off()


## Income
pdf("graphs/ses_irt_income.pdf")
output <- ses.income.irt
offset <- seq(-0.15,0.15,length.out=3)
par(mar=c(7.5,4.5,2,2))
plot(1, type="n",ylab="", xlab="Annual Income", ylim=c(-0.6,0.8), xlim=c(0.5,4.5),
     cex.lab=1.5, cex.axis=1.5, xaxt="n")
abline(h=0)
axis(1,at=1:4, labels=c("0-50K","50-150K","150-300K",">300K"), cex.axis=1.5,tick=F,line=0)
axis(1,at=1:4, labels = NA, tick = TRUE)
for(i in seq(-2,2,by=0.05)) abline(h=i,col="#AAAAAA50",lwd=1)
for(i in seq(-0.5,4.5,by=0.25)) abline(v=i,col="#AAAAAA50",lwd=1)
n<-dim(output)[1]
for (k in 1:3) { # three latent variables
  for (i in 1:n) {lines(c(i,i) + offset[k],c(output[i,k,3],output[i,k,4]),lwd=4, col = mycol[k])}
  points(c(1:n) + offset[k],output[,k,1],pch=mypch[k],cex=2, col = mycol[k])
}
legend("topleft", legend = c("Political Liberalism",
                             "Pro-Market/Non-Traditional Values",
                             "Nationalism * (-1)"),
       col=mycol, pch=mypch, lty=1, bty = "n", lwd = 3, cex = 1.3)
title(main = "IRT: Original Model", line = -31.5)
graphics.off()



## Age
pdf("graphs/ses_irt_age.pdf",width = 20)
output <- ses.age.irt
legends = c("Political Liberalism","Pro-Market/Non-Traditional Values","Nationalism * (-1)")
par(mar=c(15,4.5,2,2), mfcol = c(1,3))
for (k in 1:3) {
  plot(1, type="n",ylab="", xlab="Age", ylim=c(-0.6,0.6), xlim=c(-1,45),
       cex.lab=2.5, cex.axis=2, xaxt="n")
  abline(h=0)
  axis(1,at=seq(1,43, by = 3), labels=seq(18, 60, by =3),
       cex.axis= 2,tick=TRUE,line=0)
  for(i in seq(-2,2,by=0.05)) abline(h=i,col="#AAAAAA50",lwd=1)
  for(i in seq(-2,45,by=3)) abline(v=i,col="#AAAAAA50",lwd=1)
  n<-dim(output)[1]
  for (i in 1:n) {lines(c(i,i),c(output[i,k,3],output[i,k,4]),
                        lwd= 4, col = 1)}
  points(c(1:n),output[,k,1],pch=mypch[k],cex=2, col = 1)
  title(legends[k], line = -3, cex.main = 2.5)
}
title(main = "IRT: Original Model", line = -48, cex.main =3, adj = 1)
graphics.off()
